---
title: 'Purchase: quarter X year dummies no duration since peak'
author: 'David Hume'
date: '`r format(Sys.time(), "%d %B %Y")`'
output: 
   html_document:
    toc: true
    toc_depth: 3 
editor_options: 
  chunk_output_type: console
---
---

```{r setup, message=FALSE, include = FALSE}
library(tidyverse)
library(data.table)
library(Hmisc)
library(fasttime)
library(lubridate)
library(stringr)
library(car)
library(stargazer)
library(lfe)
library(plm)
library(knitr)
library(pryr)
library(broom)
library(DescTools)
options(width=200, scipen=10)
knitr::opts_chunk$set(echo = TRUE, cache = FALSE, warning = FALSE, 
                      message = FALSE, cache.lazy = FALSE)

```

<div style="line-height: 2em;">
<font size="4"> 

---

```{r echo = FALSE, eval=TRUE}

Time <- Sys.time()

# Set-up

# Switches & setting of variables

# Data set either "All8pc", " "All1pc", "Test", "Render"
analysis.1 <- "Render"
# Filetype for output
# "latex" 
# "html" - inserting into html [htmltools::includeHTML()] or Word docs [Insert/[Text]/Object/Text from file]
# "text" - viewing on screen
filetype.1 <- "Render"


analysis <- ifelse(analysis.1 =="Render", readRDS("./Data/Analysis.rds"), analysis.1)
filetype <- ifelse(filetype.1 =="Render", readRDS("./Data/Filetype.rds"), filetype.1)


if (filetype=="latex") {
  out <- "tex"

  } else if (filetype=="html") {
      out <- "html"

    } else if (filetype=="text") {
      out <- "csv"

      } else {
        stop("latex, html or text")
}



# Set variables
fig = 0 # Counter for figures

```

```{r cache=FALSE, eval=TRUE, echo=FALSE}

Keep_cols <- c("Property_Id", "Type", "New", "Quality100000", "Region", "date.val", "YrQtr", "sold.dummy", "IdxPriceA", "PriceA", "DurationVal",  "Transactions_band", "Stock_band", "LTV_band", "Peak_price")




#if (analysis.1!="Render") {
  if (analysis=="All") {
    f <- function(x, pos) subset(x, Type == Type) # i.e. reads in all data because condition always TRUE
      Resold.indexed <-  read_csv_chunked("./Data/Resold.indexed.All.csv", DataFrameCallback$new(f), chunk_size = 2000000)
      Resold.indexed <- as.data.table(Resold.indexed)
      Resold.indexed.regress <- Resold.indexed[, ..Keep_cols]
      Resold.indexed <- NULL
      
      } else {
        Resold.indexed.regress <-fread(paste0("./Data/Resold.indexed.", analysis, ".csv"), select = Keep_cols)
  }
#}



Resold.indexed.regress <- Resold.indexed.regress[,`:=`(Type=factor(Type), 
                                                         New=factor(New), 
                                                         YrQtr=factor(YrQtr),
                                                         Region=factor(Region),
                                                         Property_Id = factor(Property_Id),
                                                         date.val=as.Date(date.val, origin ="1970-01-01"),
                                                       Val_year=factor(year(date.val)),
                                                         Val_qtr=factor(quarter(date.val)))][
                                                           ,`:=`(Type = relevel(Type, "Flat"), New = relevel(New, "Old"))]



cat("DATA:", analysis)

```







```{r cache=FALSE,  echo= FALSE}



regression_variables <- function(dt, Ref_price, Ref_duration, return) {
  # NB Ref_price needs quotes
  DT <- dt[, .(Ref_price=get(Ref_price), Duration = get(Ref_duration), IdxPriceA, PriceA, Purch_duration = DurationVal, 
               sold.dummy, Property_Id, YrQtr, Val_year, Val_qtr, Type, New, Quality100000, Region)]
  
  DT[, F_unrealgain :=round(IdxPriceA-Ref_price) # Calculates unrealised gain or loss - price paid vs index
     ][, `:=`(F_unrealgainAdj = Winsorize(F_unrealgain/100000, probs = c(0.01, 0.99), na.rm = TRUE),
              F_unrealgainYN = F_unrealgain >=0,
              Price_unrealgainAdj = Winsorize((IdxPriceA-PriceA)/100000, probs = c(0.01, 0.99), na.rm = TRUE),
              Price_unrealgainYN = ifelse(IdxPriceA-PriceA>=0, TRUE, FALSE))
       ][, `:=`(Purch_duration2 = Purch_duration^2,
                         Purch_duration3 = Purch_duration^3,
                         Purch_duration4 = Purch_duration^4,
                         Purch_duration5 = Purch_duration^5)]
  
  
  
  if (return=="P") {
   DT[, `:=`(F_unrealgain.pos = ifelse(F_unrealgainYN,10000000*F_unrealgainAdj/Ref_price,0),
                       F_unrealgain.neg = ifelse(!F_unrealgainYN,10000000*F_unrealgainAdj/Ref_price,0),
                       Price_unrealgain.pos = ifelse(Price_unrealgainYN,10000000*Price_unrealgainAdj/PriceA,0),
                       Price_unrealgain.neg = ifelse(!Price_unrealgainYN,10000000*Price_unrealgainAdj/PriceA,0))]
  
    } else if (return=="L") {
        DT[, `:=`(F_unrealgain.pos = ifelse(F_unrealgainYN,F_unrealgainAdj,0),
                       F_unrealgain.neg = ifelse(!F_unrealgainYN,F_unrealgainAdj,0),
                       Price_unrealgain.pos = ifelse(Price_unrealgainYN,Price_unrealgainAdj,0),
                       Price_unrealgain.neg = ifelse(!Price_unrealgainYN,Price_unrealgainAdj,0))]
    } else if (return=="NA") {
        DT<-DT
      } else {
        stop("L, P or NA")
        }

  
  DT  <- DT[, c("Ref_price", "IdxPriceA", "PriceA", "Price_unrealgainAdj", "F_unrealgainAdj", "F_unrealgain"):= NULL]

  }


DP4 <- function(x){as.character(format(round(x, 4),nsmall= 4)[1])}

BR4 <- function(x){paste0("(",DP4(x), ")")}
  
MS_table <- function(object, sample){
    
    result <- data.table(tidy(object)) # requires library broom
    result <- result[, stars:= ifelse(p.value<.01, "***", ifelse(p.value<.05, "**", ifelse(p.value<.1, "*", "")))]
    MS_summary <- fread(paste0("./Tables/MS_summary_raw.", analysis, ".csv"))
    
    if (sample=="Sales_high") {
      MS_summary <- rbind(MS_summary, data.table(Description="Liquidity (Sales)", Purchase_coeff="",  Purchase_se="", Constant_coeff="", Constant_se=""))
      Next_desc <- "Above Median"
    } else if (sample=="Stock_high") {
      MS_summary <- rbind(MS_summary, data.table(Description="Size (Stock)", Purchase_coeff="",  Purchase_se="", Constant_coeff="", Constant_se=""))
      Next_desc <- "Above Median"
    } else if (sample=="LTV_high") {
      MS_summary <- rbind(MS_summary, data.table(Description="Leverage (LTV%)", Purchase_coeff="",  Purchase_se="",  Constant_coeff="", Constant_se=""))
      Next_desc <- "Above Median"
    } else if (sample=="TimePurchase_high") {
      MS_summary <- rbind(MS_summary, data.table(Description="Time Since Purchase", Purchase_coeff="",  Purchase_se="",  Constant_coeff="", Constant_se=""))
      Next_desc <- "Above Median"
    } else {Next_desc <- "Below Median"}
  
    MS_summary <- rbind(MS_summary, data.table(Description=Next_desc,
                                        Purchase_coeff=paste0(DP4(result[term=="Price_unrealgainYNTRUE",.(estimate)]),result[term=="Price_unrealgainYNTRUE",.(stars)]),
                                               Purchase_se=BR4(result[term=="Price_unrealgainYNTRUE",.(std.error)]),
                                               Constant_coeff=paste0(DP4(result[term=="(Intercept)",.(estimate)]),result[term=="(Intercept)",.(stars)]), 
                                               Constant_se=BR4(result[term=="(Intercept)",.(std.error)])))
    
    fwrite(MS_summary, paste0("./Tables/MS_summary_raw.", analysis, ".csv"))
  }
  






Purchase_ols <- function(dt, Ref_price, Ref_duration, title, sample){
  
  # title e.g. "Purchase", "Peak", "Neighbour"
  # sample e.g. "Incl3yr", "Excl3yr"
  # format short (excl property characteristics) or long

  return <- "NA"
  
  DT <- regression_variables(dt, Ref_price, Ref_duration, return)


ols_m2 <- felm(sold.dummy ~ Price_unrealgainYN  + Purch_duration + Purch_duration2 +  Purch_duration3 + Purch_duration4 + Purch_duration5 + Type + New + Quality100000 + Region| 0|0|  Property_Id + YrQtr , data =  DT, na.action=na.exclude)

ols_m1 <- felm(sold.dummy ~ Price_unrealgainYN |  0|0|  Property_Id + YrQtr, data =  DT, na.action=na.exclude)


Covariate_Labels <- c("Gain Since Purchase = 1", "Years From Purchase", "Detached = 1", "Semi-detached = 1","Terraced = 1","New-build = 1", "Quality (£100,000)")


table <- stargazer(ols_m1, ols_m2,  no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "NO", "YES"), 
                             c( "Region", "NO", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Region"), 
              float = F, omit.table.layout = "n", column.sep.width = "-15pt", table.layout="-dc#-tas-",
              type = "latex")


table <- str_replace_all(table, "\\^", "")
table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
write.table(table[11:(length(table)-2)], col.names = F, row.names = F, quote = FALSE, paste0("./Tables/LaTeX/", title, sample, analysis, return, "Clean_ols.tex"))
  
stargazer(ols_m1, ols_m2,  no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "NO", "YES"), 
                             c( "Property Characteristics", "NO", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Region"), 
              font.size = "footnotesize", omit.table.layout = "n", column.sep.width = "-15pt", 
              type = filetype, out=paste0("./Tables/", title, sample, analysis, return, "_ols.", out))
    

}


Purchase_both <- function(dt, Ref_price, Ref_duration, title, sample, return){
  
  # title e.g. "Purchase", "Peak", "Neighbour"
  # sample e.g. "Incl3yr", "Excl3yr"
  # format short (excl property characteristics) or long
  
  
  DT <- regression_variables(dt, Ref_price, Ref_duration, return)
  
  ols_m1 <- felm(sold.dummy ~ Price_unrealgainYN  + Price_unrealgain.pos + Price_unrealgain.neg  + Purch_duration + Purch_duration2 +  Purch_duration3 + Purch_duration4 + Purch_duration5 + Type + New + Quality100000 + Region| 0|0|  Property_Id + YrQtr , data =  DT, na.action=na.exclude)
  
  
  DT  <- DT[, c("Type", "New", "Quality100000", "Region"):= NULL]

  
ols_m2 <- felm(sold.dummy ~ Price_unrealgainYN + Price_unrealgain.pos + Price_unrealgain.neg  + Purch_duration + Purch_duration2 +  Purch_duration3 + Purch_duration4 + Purch_duration5 | Property_Id + YrQtr|0|  Property_Id + YrQtr , data =  DT, na.action=na.exclude)



 if (return=="P") {
Covariate_Labels <- c("Gain Since Purchase = 1", "Return Since Purchase > 0 (%)",  "Return Since Purchase < 0 (%)",  "Years From Purchase")
  
    } else if (return=="L") {
Covariate_Labels <- c("Gain Since Purchase = 1", "Return Since Purchase > 0 (£100,000)",  "Return Since Purchase < 0 (£100,000)", "Years From Purchase")
} else {
        stop("L or P")
        }



table <- stargazer(ols_m1, ols_m2, no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)", "(3)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "YES", "YES"),
                             c("Property Characteristics", "YES","NO"),
                             c( "Property FE", "NO", "YES"), 
                             c( "Quarter FE", "NO", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Type", "New", "Quality100000", "Region"), 
              float = F, omit.table.layout = "n", column.sep.width = "-15pt", table.layout="-dc#-tas-",
              type = "latex")


table <- str_replace_all(table, "\\^", "")
table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
write.table(table[11:(length(table)-2)], col.names = F, row.names = F, quote = FALSE, paste0("./Tables/LaTeX/", title, sample, analysis, return, "Clean_fe.tex"))
  
stargazer(ols_m1, ols_m2, no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)", "(3)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "YES", "YES"),
                             c("Property Characteristics", "YES","NO"),
                             c( "Property FE", "NO", "YES"), 
                             c( "Quarter FE", "NO", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Type", "New", "Quality100000", "Region"), 
              font.size = "footnotesize", omit.table.layout = "n", column.sep.width = "-15pt", 
              type = filetype, out=paste0("./Tables/", title, sample, analysis, return, "_fe.", out))
    

}





MS_ols <- function(dt, Ref_price, Ref_duration, title, sample, return){
  
  # title e.g. "Purchase", "Peak", "Neighbour"
  # sample e.g. "Incl3yr", "Excl3yr"
  # format short (excl property characteristics) or long

  
  DT <- regression_variables(dt, Ref_price, Ref_duration, return)

ols_m3 <- felm(sold.dummy ~ Price_unrealgainYN  + Purch_duration + Purch_duration2 +  Purch_duration3 + Purch_duration4 + Purch_duration5 + Type + New + Quality100000 + Region + Price_unrealgain.pos + Price_unrealgain.neg | 0|0|  Property_Id + YrQtr , data =  DT, na.action=na.exclude)  

ols_m2 <- felm(sold.dummy ~ Price_unrealgainYN  + Purch_duration + Purch_duration2 +  Purch_duration3 + Purch_duration4 + Purch_duration5 + Type + New + Quality100000 + Region| 0|0|  Property_Id + YrQtr , data =  DT, na.action=na.exclude)

ols_m1 <- felm(sold.dummy ~ Price_unrealgainYN |  0|0|  Property_Id + YrQtr, data =  DT, na.action=na.exclude)


MS_table(ols_m3, sample)
# Update median split summary table



if (return=="P") {
Covariate_Labels <- c("Gain Since Purchase = 1", "Years From Purchase", "Return Since Purchase > 0 (%)",  "Return Since Purchase < 0 (%)")
  
    } else if (return=="L") {
Covariate_Labels <- c("Gain Since Purchase = 1", "Years From Purchase", "Return Since Purchase > 0 (£100,000)",  "Return Since Purchase < 0 (£100,000)")
} else {
        stop("L or P")
        }


table <- stargazer(ols_m1, ols_m2, ols_m3,  no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)", "(3)", "(4)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "NO", "YES", "YES"), 
                             c( "Property Characteristics", "NO", "YES", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Type", "New", "Quality100000", "Region"), 
              float = F, omit.table.layout = "n", column.sep.width = "-15pt", table.layout="-dc#-tas-",
              type = "latex")


table <- str_replace_all(table, "\\^", "")
table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
write.table(table[11:(length(table)-2)], col.names = F, row.names = F, quote = FALSE, paste0("./Tables/LaTeX/", title, sample, analysis, return, "Clean_ols.tex"))
  
stargazer(ols_m1, ols_m2, ols_m3,  no.space=TRUE, align=TRUE, digits = 4, digits.extra = 0,  dep.var.caption  = "",
            dep.var.labels = "Sale", object.names=FALSE, model.numbers=FALSE,  model.names=FALSE, column.labels = c("(1)", "(2)", "(3)", "(4)"), 
              covariate.labels=Covariate_Labels,
            add.lines = list(c("Years From Purchase Quintics", "NO", "YES", "YES"), 
                             c( "Property Characteristics", "NO", "YES", "YES")),
              omit.stat=c("ser","f","adj.rsq"), omit=c("Purch_duration2", "Purch_duration3", "Purch_duration4", "Purch_duration5", "Type", "New", "Quality100000", "Region"), 
              font.size = "footnotesize", omit.table.layout = "n", column.sep.width = "-15pt", 
              type = filetype, out=paste0("./Tables/", title, sample, analysis, return, "_ols.", out))
    

}

```


***
# Table 1: OLS (Sign & magnitude of gains)


```{r cache=FALSE, eval=TRUE, echo=FALSE}


Purchase_ols(Resold.indexed.regress, "PriceA", "DurationVal", "Purchase","Sign")

```

***

# Table 2: OLS & FE (Magnitude of gains)

## Return as £100,000


```{r cache=FALSE, eval=TRUE, echo=FALSE}


Purchase_both(Resold.indexed.regress, "PriceA", "DurationVal", "Purchase","Magnitude", "L")

```

## Return as %


```{r cache=FALSE, eval=TRUE, echo=FALSE}


Purchase_both(Resold.indexed.regress, "PriceA", "DurationVal", "Purchase","Magnitude", "P")

```



***

# Median splits


```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_summary <- data.table(Description="Dummy", Purchase_coeff="X",  Purchase_se="X", Constant_coeff="X", Constant_se="X")

fwrite(MS_summary, paste0("./Tables/MS_summary_raw.", analysis, ".csv"))

```




## Sales (Return as £100,000)

### Above median


```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[Transactions_band=="High",], "PriceA", "DurationVal", "Purchase", "Sales_high", "L")

```



### Below median


```{r cache=FALSE, eval=TRUE, echo=FALSE}


MS_ols(Resold.indexed.regress[Transactions_band=="Low",], "PriceA", "DurationVal", "Purchase", "Sales_low", "L")

```

***
## Housing stock (Return as £100,000)


### Above median

```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[Stock_band=="High",], "PriceA", "DurationVal", "Purchase", "Stock_high", "L")


```


###  Below median

```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[Stock_band=="Low",], "PriceA", "DurationVal", "Purchase", "Stock_low", "L")


```


***
## LTV% (Return as £100,000)
### Above median
```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[LTV_band=="High",], "PriceA", "DurationVal", "Purchase", "LTV_high", "L")


```


###  Below median
```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[LTV_band=="Low",], "PriceA", "DurationVal", "Purchase", "LTV_low", "L")

```

# Median splits - Additional


```{r cache=FALSE, eval=TRUE, echo=FALSE}
Resold.indexed.regress <- Resold.indexed.regress[, Time_purch_band := factor(ifelse(DurationVal==median(DurationVal), "Equals median", ifelse(DurationVal>median(DurationVal), "High", "Low")))]

```


## Time since Purchase (Return as £100,000)

### Above median

```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[Time_purch_band=="High",], "PriceA", "DurationVal", "Purchase", "TimePurchase_high", "L")

```


### Below median

```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_ols(Resold.indexed.regress[Time_purch_band=="Low",], "PriceA", "DurationVal", "Purchase", "TimePurchase_low", "L")

```



# Table 4: Median Splits summary

```{r cache=FALSE, eval=TRUE, echo=FALSE}

MS_summary <- fread(paste0("./Tables/MS_summary_raw.", analysis, ".csv"))

(MS_summary <- MS_summary[-1])

fwrite(MS_summary, paste0("./Tables/Purchase_MS_summary_", analysis, ".csv"))

```

 


***


```{r cache=FALSE, eval=TRUE, echo=FALSE}
Sys.time() - Time

```

